---
title: "Validity Analyses"
author: "Erica Ann Metheney and Lauren Yehle"
header-includes:
    - \usepackage{float}
    - \usepackage{array}
    - \usepackage{multirow}
    - \usepackage{multicol}
    - \usepackage{longtable}
    - \usepackage{dcolumn}
date: \today{}
output:
  pdf_document:
      fig_caption: true
      number_sections: true
      keep_tex: true
      keep_md: yes
geometry: margin=2cm
---

```{r setup, include=FALSE}
require("knitr")
require("summarytools")
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE, comment=NA,prompt = FALSE, cache = FALSE, results = 'asis',fig.width = 8,fig.height = 6,results = 'asis',tidy.opts = list(width.cutoff = 60), tidy = TRUE, dev = "png")
opts_knit$set(root.dir = "C:/Users/xmeter/University of Gothenburg/GLD Projects - Documents/Generative AI and Survey Development/Analyses/Experiment 1 Quantitative")

st_options(bootstrap.css     = FALSE,       # Already part of the theme so no need for it
           plain.ascii       = FALSE,       # One of the essential settings
           style             = "rmarkdown", # Idem.
           dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
           footnote          = NA,          # Keeping the results minimalistic
           subtitle.emphasis = FALSE)       # For the vignette theme, this gives better results.
                                            # For other themes, using TRUE might be preferable.

st_css()
```

```{r required_libraries, echo = FALSE}
library(readxl)
library(plyr)
library(dplyr)
library(lme4)
library(ggplot2)
library(ggmosaic)
library(nlme)
library(merTools)
library(lmerTest)
library(kableExtra)
library(forcats)
library(stargazer)
library(readxl)
library(interactions)
library(formatR)
library(texreg)
library(tidyr)
```

```{r data_import, echo = FALSE}
#...human coded experimental units
#human.codes <- read.csv("ExpertCoding_ExperimentalUnits.csv")
#...qualitatively coded AI output
#dat <- read_excel("coded_AI_output.xlsx")
#...treatment information for AI output
#trtID <- read_excel("GPTresults_with_Treatment.xlsx")

dat <- read_excel("C:/Users/xmeter/University of Gothenburg/GLD Projects - Documents/Generative AI and Survey Development/Data/QualitativeCodes/Nontraining/Experiment 1/EXP1_Output_Anonymized_Nov16_updated coded.xlsx")
trtID <- read_excel("C:/Users/xmeter/University of Gothenburg/GLD Projects - Documents/Generative AI and Survey Development/Data/GPT Output/NonTraining/Anonymize 15 Nov/Exp1_Nov16 updated.xlsx")
human.codes <- read.csv("C:/Users/xmeter/University of Gothenburg/GLD Projects - Documents/Generative AI and Survey Development/Data/ExpertCoding_ExperimentalUnits.csv")
```

```{r data_prep, echo = FALSE}
dat$StatDes = paste(dat$Statement,dat$Description,sep = " ")
trtID$StatDes = paste(trtID$Statement,trtID$Description,sep = " ")

#Remove Treatment with Error
#...E1M1P2G8 was never actually ran. 
#...See E1G3G8
#...The output from E1M1P2G8 was copied and relabeled as E1M1P2G8
#...Therefore we delete the duplicate from the dataset

#...remove from treatment dataframe
indx.trtID = which(trtID$Treatment == "E1M1P2G8")
trtID = trtID[-indx.trtID,]
#...remove from coded dataframe
#...indices obtained by matching Statement and Description
indx.dat = c(7526,5316,4839,2666,370)
dat = subset(dat,!(AnonID %in% indx.dat))

#Merge
dat = merge(dat,trtID,by = "StatDes")

#Clean Up
dat$Statement = dat$Statement.x
dat$Description = dat$Description.x

#Breakdown ID into Constituent Parts
dat$StatementID = substr(dat$ID, nchar(dat$ID)-1,nchar(dat$ID))
dat$TrtQ = sub("S[^S]*$", "", dat$ID)
dat$Model = substr(dat$TrtQ,3,4)
dat$Persona = substr(dat$TrtQ,5,6)
dat$Question = substring(dat$TrtQ,7)

dat$Source = ifelse(startsWith(dat$Question,"L"),"LGPI",NA)
dat$Source[startsWith(dat$Question,"W")]<- "WVS"
dat$Source[startsWith(dat$Question,"G")]<- "Gallup"
dat$Source[startsWith(dat$Question,"Q")]<- "QQ"
dat$Source = factor(dat$Source, levels = c("QQ","LGPI","WVS","Gallup"))

#Fix TrtQ Error
#...They were originally labeled as "E1M1P3L122"
#...found by checking for duplicates in TrtQ in dat
#...we find 10 instances (instead of 5) of E1M1P3L122
#...check the chat logs (E1M1L120L129/chat.html) to identify the correct E1M1P3L122
#...Check TrtQ to find that E1M2P3L122 is missing
#...confirm that duplicate output is E1M2P3L122 in E1M2P3L122/chat.html
dat$TrtQ[which(dat$AnonID == 2817)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 6370)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 565)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 186)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 2525)] <- "E1M2P3L122"

dat$Model[which(dat$AnonID == 2817)] <- "M2"
dat$Model[which(dat$AnonID == 6370)] <- "M2"
dat$Model[which(dat$AnonID == 565)] <- "M2"
dat$Model[which(dat$AnonID == 186)] <- "M2"
dat$Model[which(dat$AnonID == 2525)] <- "M2"

dat$Persona[which(dat$AnonID == 2817)] <- "P3"
dat$Persona[which(dat$AnonID == 6370)] <- "P3"
dat$Persona[which(dat$AnonID == 565)] <- "P3"
dat$Persona[which(dat$AnonID == 186)] <- "P3"
dat$Persona[which(dat$AnonID == 2525)] <- "P3"

#Condense Responses within Statements
df = dat %>% group_by(TrtQ) %>% dplyr::summarize(`Code 1` = max(`1 Vague Term or Phrase`,na.rm = TRUE),
                                                 `Code 2` = max(`2 Specialized Knowledge`,na.rm = TRUE),
                                                 `Code 3` = max(`3 Syntax Problem`,na.rm = TRUE),
                                                 `Code 4` = max(`4 Unfair Presumption`,na.rm = TRUE),
                                                 `Code 5` = max(`5 Double Barreled`,na.rm = TRUE),
                                                 `Code 6` = max(`6 Undefined or Ill Reference Period`,na.rm = TRUE),
                                                 `Code 7` = max(`7 Difficult Recall or reference period`,na.rm = TRUE),
                                                 `Code 8` = max(`8 Complex Estimation/Computation`,na.rm = TRUE),
                                                 `Code 9` = max(`9 Sensitive`,na.rm = TRUE),
                                                 `Code 10` = max(`10 Leading/Biasing`,na.rm = TRUE),
                                                 `Code 11` = max(`11 Answer Set`, na.rm = TRUE),
                                                 SysVar= max(`Internal: Systematic Variation`, na.rm = TRUE),
                                                 NOTA = max(`None of the Above`,na.rm = TRUE),
                                                 TrtQ = unique(TrtQ),
                                                 Model = unique(Model),
                                                 Persona = unique(Persona),
                                                 Question = unique(Question),
                                                 Source = unique(Source))

df[df==-Inf]<- 0
df = df %>% dplyr::mutate(NumCodesTrtQ = `Code 1` + `Code 2`+ `Code 3` + `Code 4` + `Code 5` + `Code 6` + `Code 7` + `Code 8` + `Code 9` + `Code 10` + `Code 11`)

df$Trt = paste(df$Model,df$Persona,sep = "")

df$NOTA2 = ifelse(df$NOTA == 1|df$SysVar == 1,1,0)
```



# Question Source Analysis

We first fit a multilevel model with random Question intercept and a Source fixed effect to model the average number of unique codes per treatment-question pair. 

```{r question_source, echo = FALSE}
fit.source <- lmer(NumCodesTrtQ ~ Model + Persona + Source + (1|Question),data = df)
texreg(fit.source,custom.model.names = "Number of Codes",caption.above = TRUE, caption = "Regression Results of Question Source Analysis",label = "reg:Source",float.pos = "htbp")
```

Next, we fit a multilevel model of the likelihood of each code existing with a random Question intercept and Source fixed effect. 

```{r, echo = FALSE}
fit.Code1.source <- lmer(`Code 1` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code2.source <- lmer(`Code 2` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code3.source <- lmer(`Code 3` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code4.source <- lmer(`Code 4` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code5.source <- lmer(`Code 5` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code6.source <- lmer(`Code 6` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code7.source <- lmer(`Code 7` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code8.source <- lmer(`Code 8` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code9.source <- lmer(`Code 9` ~ Model + Persona +  Source + (1|Question),data = df)
fit.Code10.source <- lmer(`Code 10` ~ Model + Persona +  Source + (1|Question),data = df)

texreg(list(fit.Code1.source,fit.Code2.source,fit.Code3.source,fit.Code4.source,fit.Code5.source), custom.model.names = c("Code 1","Code 2","Code 3","Code 4","Code 5"),caption.above = TRUE, caption = "Multilevel Regression Results of Question Source Analysis - Likelihood of Codes 1 to 5",label = "reg:Codes1to5Source",float.pos = "htbp")

texreg(list(fit.Code6.source,fit.Code7.source,fit.Code8.source,fit.Code9.source,fit.Code10.source),custom.model.names = c("Code 6","Code 7","Code 8","Code 9", "Code 10"),caption.above = TRUE, caption = "Multilevel Regression Results of Question Source Analysis - Likelihood of Codes 6 to 10",label = "reg:Codes6to10Source",float.pos = "htbp")
```

Lastly, we perform a Holm's correction on the p-values to account for the multiple testing of the 10 codes. 

## WVS with Holm's Correction

```{r, echo = FALSE}
models = list(fit.Code1.source,fit.Code2.source,fit.Code3.source,fit.Code4.source,fit.Code5.source,fit.Code6.source,fit.Code7.source,fit.Code8.source,fit.Code9.source,fit.Code10.source)

library(broom.mixed)
#Get p-values for Model effect (M2)
target_term <- "SourceWVS"
raw_pvals <- sapply(models, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_M2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)


# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:10),caption = "Question Source Regression Results with Holm's Multiple Testing Correction on SourceWVS",caption.above = TRUE,label = "reg:Codes1to10SourceWVS",float.pos = "htbp")

```

## Gallup with Holm's Correction

```{r, echo = FALSE}
models = list(fit.Code1.source,fit.Code2.source,fit.Code3.source,fit.Code4.source,fit.Code5.source,fit.Code6.source,fit.Code7.source,fit.Code8.source,fit.Code9.source,fit.Code10.source)

library(broom.mixed)
#Get p-values for Model effect (M2)
target_term <- "SourceGallup"
raw_pvals <- sapply(models, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_M2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)


# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:10),caption = "Question Source Regression Results with Holm's Multiple Testing Correction on SourceGallup",caption.above = TRUE,label = "reg:Codes1to10SourceGallup",float.pos = "htbp")

```

# Statement Order Analysis

We begin by computing the average statement placement (1-5) for each code. 

```{r statement_order, echo = FALSE}
dat[is.na(dat)]<- 0
dat$NOTA2 = ifelse(dat$`Internal: Systematic Variation` == 1| dat$`None of the Above` == 1,1,0)
colnames(dat)[c(7:17,33)] = c(paste("Code",1:11,sep = " "),"NOTA2")

dat$SysVar = dat$`Internal: Systematic Variation`
dat$NOTA = dat$`None of the Above`
outcome.vars = c(paste("Code",1:11,sep = " "),"SysVar","NOTA")
statement.order.df  = data.frame(Code = outcome.vars,Average = numeric(13))
dat$StatementNum = as.numeric(dat$StatementNum)
for(i in 1:13){
  hold = subset(dat,get(outcome.vars[i]) == 1)
  statement.order.df[i,2] = mean(hold$StatementNum)
}

kable(statement.order.df,caption = "Average Code Placement")

statement.order.df$Code = factor(statement.order.df$Code,levels = c(paste("Code",1:11,sep = " "),"SysVar","NOTA"))


statement.order.df %>%
  arrange(Average) %>%    # First sort by val. This sort the dataframe but NOT the factor levels
  mutate(Code=factor(Code, levels=Code)) %>%   # This trick update the factor levels
  ggplot( aes(x=Code, y=Average)) +
  geom_segment( aes(xend=Code, yend=1)) +
  geom_point(size=4) +
  theme_light() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank()
  ) +
  xlab("") +
  ylab("Average Code Placement")+
  ylim(1,5)+
  theme(axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12))+
  coord_flip()





W = subset(dat,select = c("StatementNum",paste("Code",1:11,sep = " "),"SysVar","NOTA"))
W[W == 0]<- NA
long <- W %>%
  pivot_longer(
    cols = `Code 1`:`NOTA`,
    names_to = "Code",
    values_to = "CodeExists"
  )
long = subset(long,!is.na(CodeExists))
```

Next, we perform an ANOVA test to see if the codes all have the same average placement.

```{r  echo = FALSE,results = 'as is'}
one.way <- aov(StatementNum ~ Code, data = long)

library(papaja)
formatted <- apa_print(one.way)
apa_table(formatted$table,placement = "htbp")
```

The results of the ANOVA test suggest that not all the codes have the same averabge placement. We perform a Tukey HSD test to determine which means are statistically the same and different. 

```{r, echo = FALSE}
tukey.df = as.data.frame(TukeyHSD(one.way, conf.level=.95)$Code)
tukey = TukeyHSD(one.way, conf.level=.95)

kable(tukey.df,col.names = c("Difference","Lower CI","Upper CI","Adjusted p-Value"))  %>% column_spec(1,background = ifelse(tukey.df$`p adj`<0.05, "#79C9C6", "#C1BBB5"))%>%
  kable_styling(latex_options = "HOLD_position")
```


\newpage 

# Comparison to Human Expert

```{r human_comparison, echo = FALSE}
df.M1P1 = subset(df,Trt == "M1P1")
df.M1P2 = subset(df,Trt == "M1P2")
df.M1P3 = subset(df,Trt == "M1P3")
df.M2P1 = subset(df,Trt == "M2P1")
df.M2P2 = subset(df,Trt == "M2P2")
df.M2P3= subset(df,Trt == "M2P3")

# Make sure the row orders of all datasets are the same
Q.order = sort(human.codes$CodeName)
Q.order.special = Q.order[-which(Q.order == "G8")]

df.M1P1 = df.M1P1[match(Q.order, df.M1P1$Question), ]
df.M1P2 = df.M1P2[match(Q.order.special, df.M1P2$Question), ]
df.M1P3 = df.M1P3[match(Q.order, df.M1P3$Question), ]
df.M2P1 = df.M2P1[match(Q.order, df.M2P1$Question), ]
df.M2P2 = df.M2P2[match(Q.order, df.M2P2$Question), ]
df.M2P3 = df.M2P3[match(Q.order, df.M2P3$Question), ]

human.codes.special = subset(human.codes, CodeName != "G8")
human.codes.special = human.codes.special[match(Q.order.special,human.codes.special$CodeName),]
human.codes = human.codes[match(Q.order,human.codes$CodeName),]

human.codes.special[is.na(human.codes.special)]<- 0
human.codes[is.na(human.codes)]<- 0


label_columns <- function(df, col1, col2, label1, label2) {
  vals1 <- df[[col1]]
  vals2 <- df[[col2]]
  result <- ifelse(vals1 == 1 & vals2 == 1, "Both",
                   ifelse(vals1 == 1 & vals2 != 1, paste(label1, "Only"),
                          ifelse(vals1 != 1 & vals2 == 1, paste(label2, "Only"),
                                 "Neither")))
  return(result)
}


# Compare Expert and AI Codes----
expert.col.nums = 4:13
ai.col.nums = 2:11

expert = human.codes[,c(2,expert.col.nums)]
expert.sp = human.codes.special[,c(2,expert.col.nums)]
m1p1 = df.M1P1[,c(17,ai.col.nums)]
m1p2 = df.M1P2[,c(17,ai.col.nums)]
m1p3 = df.M1P3[,c(17,ai.col.nums)]
m2p1 = df.M2P1[,c(17,ai.col.nums)]
m2p2 = df.M2P2[,c(17,ai.col.nums)]
m2p3 = df.M2P3[,c(17,ai.col.nums)]


#...Data Summary
hold = merge(expert,m1p1,by.x = "CodeName",by.y = "Question")
m1p1.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m1p1.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}


hold = merge(expert.sp,m1p2,by.x = "CodeName",by.y = "Question")
m1p2.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m1p2.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}



hold = merge(expert,m1p3,by.x = "CodeName",by.y = "Question")
m1p3.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m1p3.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}


hold = merge(expert,m2p1,by.x = "CodeName",by.y = "Question")
m2p1.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m2p1.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}



hold = merge(expert,m2p2,by.x = "CodeName",by.y = "Question")
m2p2.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m2p2.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}



hold = merge(expert,m2p3,by.x = "CodeName",by.y = "Question")
m2p3.comp = data.frame(Question = hold$CodeName,
                       `Code 1` = NA,
                       `Code 2` = NA,
                       `Code 3` = NA,
                       `Code 4` = NA,
                       `Code 5` = NA,
                       `Code 6` = NA,
                       `Code 7` = NA,
                       `Code 8` = NA,
                       `Code 9` = NA,
                       `Code 10` = NA)
for(i in 1:10){
  m2p3.comp[,i+1] = label_columns(hold,i+1,i+11,"Human","AI")
}

#....Statistics Overall
percentage_table <- function(df) {
  # Exclude the first column
  df_values <- df[, -1, drop = FALSE]
  # Flatten to a vector
  values <- as.vector(as.matrix(df_values))
  # Get counts
  counts <- table(values)
  # Calculate percentages
  percentages <- round(100 * counts / length(values), 2)
  # Desired order
  order_vec <- c("Both", "Neither", "AI Only", "Human Only")
  # Create result data frame in desired order
  result <- data.frame(
    Value = order_vec,
    Percentage = as.numeric(percentages[order_vec])
  )
  # Replace NA with 0 for missing categories
  result$Percentage[is.na(result$Percentage)] <- 0
  return(result)
}


m1p1.stat = percentage_table(m1p1.comp)
m1p2.stat = percentage_table(m1p2.comp)
m1p3.stat = percentage_table(m1p3.comp)
m2p1.stat = percentage_table(m2p1.comp)
m2p2.stat = percentage_table(m2p2.comp)
m2p3.stat = percentage_table(m2p3.comp)


overall.comb = data.frame(Which = c("Both", "Neither", "AI Only", "Human Only"),
                          M1P1 = NA,
                          M1P2 = NA,
                          M1P3 = NA,
                          M2P1 = NA,
                          M2P2 = NA,
                          M2P3 = NA)
overall.comb[,2] <- m1p1.stat[,2]
overall.comb[,3] <- m1p2.stat[,2]
overall.comb[,4] <- m1p3.stat[,2]
overall.comb[,5] <- m2p1.stat[,2]
overall.comb[,6] <- m2p2.stat[,2]
overall.comb[,7] <- m2p3.stat[,2]


df <- tribble(
  ~Which, ~M1P1, ~M1P2, ~M1P3, ~M2P1, ~M2P2, ~M2P3,
  "Both", 10.00, 10.11, 9.58, 11.15, 11.49, 11.07,
  "Neither", 72.10, 70.84, 71.68, 68.05, 66.56, 68.21,
  "AI Only", 11.68, 12.91, 12.10, 15.73, 17.21, 15.57,
  "Human Only", 6.22, 6.13, 6.64, 5.08, 4.73, 5.15
)

df$Which = factor(df$Which,levels = c("Both","Human Only","AI Only","Neither"))

df %>%
  pivot_longer(-Which, names_to = "Condition", values_to = "Value") %>%
  ggplot(aes(x = Which, y = Value, fill = Condition)) +
  geom_col(position = position_dodge()) +
  scale_fill_manual(values = c("#FF9999", "#FF4D4D", "#B30000", "#99CCFF", "#4D94FF", "#003399"))+
  ylim(0,100)+
  labs(x = "Code Given By",y = "Percent",fill = "Experimental\nCondition") +
  theme_minimal()+
  theme(legend.position = "bottom",
        legend.box = "horizontal",
        legend.title = element_text(size = 10),
        legend.background = element_rect(),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10))+
  guides(fill = guide_legend(nrow = 2, byrow = TRUE))

ggsave("PerformanceComparison.png",dpi = 300)

#...Statistics by Code
percentage_table_by_column <- function(df) {
  # Exclude the first column
  df_values <- df[, -1, drop = FALSE]
  # Desired order
  order_vec <- c("Both", "Neither", "AI Only", "Human Only")
  # Initialize result list
  result_list <- lapply(seq_along(df_values), function(i) {
    col_values <- df_values[[i]]
    counts <- table(col_values)
    percentages <- round(100 * counts / length(col_values), 2)
    perc_vec <- as.numeric(percentages[order_vec])
    perc_vec[is.na(perc_vec)] <- 0
    data.frame(
      Column = names(df_values)[i],
      Value = order_vec,
      Percentage = perc_vec
    )
  })
  # Combine results
  result <- do.call(rbind, result_list)
  rownames(result) <- NULL
  return(result)
}


m1p1.stat.code = percentage_table_by_column(m1p1.comp)
m1p2.stat.code = percentage_table_by_column(m1p2.comp)
m1p3.stat.code = percentage_table_by_column(m1p3.comp)
m2p1.stat.code = percentage_table_by_column(m2p1.comp)
m2p2.stat.code = percentage_table_by_column(m2p2.comp)
m2p3.stat.code = percentage_table_by_column(m2p3.comp)

overall.stat.code = data.frame(Code = m1p1.stat.code$Column,
                               Which = m1p1.stat.code$Value,
                               M1P1 = m1p1.stat.code$Percentage,
                               M1P2 = m1p2.stat.code$Percentage,
                               M1P3 = m1p3.stat.code$Percentage,
                               M2P1 = m2p1.stat.code$Percentage,
                               M2P2 = m2p2.stat.code$Percentage,
                               M2P3 = m2p3.stat.code$Percentage)


df_long <- overall.stat.code %>%
  pivot_longer(cols = starts_with("M"), names_to = "Experimental\nCondition", values_to = "Value")

ggplot(df_long, aes(x = `Experimental\nCondition`, y = Value, fill = Which)) +
  geom_col(position = "dodge") +
  facet_wrap(~Code) +
  labs(y = "Percentage", x = "Experimental\nCondition", title = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90))+
  scale_fill_brewer(palette = "Set2")
```

